import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
get_ipython().magic(u'matplotlib inline')
import numpy as np
import pandas as pd
import matplotlib.cm as cm
from scipy import stats
from pandasql import *
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
dataset=pd.read_csv('trades_count_regression_2016-11-18.txt', sep='\t')
dataset.head()
dataset.shape
q='''select ProductType, ProductName, Date, Day, DayofMonth, sum(DayTradeTotal) DayTradeTotal
from dataset
group by ProductType, ProductName, Date, Day, DayofMonth;'''
df = sqldf(q, locals())
df.head()
df['TradeTotBCTrans']=stats.boxcox(df['DayTradeTotal'])[0]
df.head()
plt.hist(df['DayTradeTotal']); plt.title("Original Trade Volume");
plt.savefig('strata_noMat_skewHist.png', bbox_inches='tight')
plt.hist(df['TradeTotBCTrans']); plt.title("Box Cox Transformed Trade Volume");
plt.savefig('strata_noMat_Hist.png', bbox_inches='tight')
trans=.7 #edgecolor='none'
plt.figure(figsize=(25, 10))
for j, i in enumerate(np.unique(df.ProductType)):
subset=df.loc[df.ProductType==i]
subset=subset.sort_values(by="Day")
uCols=np.unique(subset.ProductName)
labs=[np.unique(subset.ProductName).tolist().index(k) for k in subset.ProductName]
plt.subplot(2,3,j+1);
plt.subplots_adjust(hspace=.3);
plt.subplots_adjust(wspace=.5);
plt.scatter(subset[['Day']], subset[['TradeTotBCTrans']], c=labs, cmap=cm.rainbow, alpha=trans);
plt.xlabel('Day'); plt.ylabel('Transformed Trade Volume');
#plt.plot(subset[['Day']], subset[['TradeTotBCTrans']], 'tab:gray', alpha=trans)
plt.title(i);
cust_hand=[]
for j in range(len(uCols)):
cust_hand.append(mpatches.Patch(color=cm.rainbow(np.linspace(0, 1, len(uCols))[j], alpha=trans), label=uCols[j]))
plt.legend(handles=cust_hand, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('strata_day_noMat.png', bbox_inches='tight')
trans=.7 #edgecolor='none'
plt.figure(figsize=(22, 10))
for j, i in enumerate(np.unique(df.ProductType)):
subset=df.loc[df.ProductType==i]
subset=subset.sort_values(by="DayofMonth")
uCols=np.unique(subset.ProductName)
labs=[np.unique(subset.ProductName).tolist().index(k) for k in subset.ProductName]
plt.subplot(2,3,j+1);
plt.subplots_adjust(hspace=.3);
plt.subplots_adjust(wspace=.5);
plt.scatter(subset[['DayofMonth']], subset[['TradeTotBCTrans']], c=labs, cmap=cm.rainbow, alpha=trans);
plt.xlabel('Day of Month'); plt.ylabel('Transformed Trade Volume');
#plt.plot(subset[['DayofMonth']], subset[['TradeTotBCTrans']], 'tab:gray', alpha=trans)
plt.title(i);
cust_hand=[]
for j in range(len(uCols)):
cust_hand.append(mpatches.Patch(color=cm.rainbow(np.linspace(0, 1, len(uCols))[j], alpha=trans), label=uCols[j]))
plt.legend(handles=cust_hand, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('strata_day_of_mo_noMat.png', bbox_inches='tight')
import matplotlib.pyplot as plt
subset=dataset.loc[dataset.ProductType=='Agr']
subset.head()
subset=subset.loc[subset.ProductName=='ZC']
subset.tail()
len(np.unique(dataset.ProductName))
#plots highlighting maturities?
trans=.5 #edgecolor='none'
plt.figure(figsize=(15, 50))
j=0
for i in np.unique(dataset.ProductType):
subset0=dataset.loc[dataset.ProductType==i]
for l in np.unique(subset0.ProductName):
subset=subset0.loc[subset0.ProductName==l]
subset=subset.sort_values(by="Day") #not needed since not line grapg
j=j+1
plt.subplot(12, 2, j); #np.ceil(len(np.unique(subset.Maturity))/3.0)
plt.subplots_adjust(hspace=.5);
plt.subplots_adjust(wspace=.5);
labs=[np.unique(subset.Maturity).tolist().index(k) for k in subset.Maturity]
plt.scatter(subset[['Day']], subset[['TradeTotBCTrans']], c=labs, cmap=cm.rainbow, alpha=trans);
plt.xlabel('Day'); plt.ylabel('Transformed Trade Volume');
#plt.plot(subset[['DayofMonth']], subset[['TradeTotBCTrans']], 'tab:gray', alpha=trans)
plt.title(l);
uCols=np.unique(subset.Maturity)
cust_hand=[]
for m in range(len(uCols)):
cust_hand.append(mpatches.Patch(color=cm.rainbow(np.linspace(0, 1, len(uCols))[m], alpha=trans), label=uCols[m]))
plt.legend(handles=cust_hand, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('strata_day_Mat.png', bbox_inches='tight')
#plots highlighting maturities?
trans=.5 #edgecolor='none'
plt.figure(figsize=(15, 50))
j=0
for i in np.unique(dataset.ProductType):
subset0=dataset.loc[dataset.ProductType==i]
for l in np.unique(subset0.ProductName):
subset=subset0.loc[subset0.ProductName==l]
subset=subset.sort_values(by="DayofMonth") #not needed since not line grapg
j=j+1
plt.subplot(12, 2, j); #np.ceil(len(np.unique(subset.Maturity))/3.0)
plt.subplots_adjust(hspace=.5);
plt.subplots_adjust(wspace=.5);
labs=[np.unique(subset.Maturity).tolist().index(k) for k in subset.Maturity]
plt.scatter(subset[['DayofMonth']], subset[['TradeTotBCTrans']], c=labs, cmap=cm.rainbow, alpha=trans);
plt.xlabel('Day of Month'); plt.ylabel('Transformed Trade Volume');
#plt.plot(subset[['DayofMonth']], subset[['TradeTotBCTrans']], 'tab:gray', alpha=trans)
plt.title(l);
uCols=np.unique(subset.Maturity)
cust_hand=[]
for m in range(len(uCols)):
cust_hand.append(mpatches.Patch(color=cm.rainbow(np.linspace(0, 1, len(uCols))[m], alpha=trans), label=uCols[m]))
plt.legend(handles=cust_hand, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('strata_dayOfMo_Mat.png', bbox_inches='tight')
#plots highlighting maturities?
trans=.5 #edgecolor='none'
plt.figure(figsize=(15, 50))
j=0
for i in np.unique(dataset.ProductType):
subset0=dataset.loc[dataset.ProductType==i]
for l in np.unique(subset0.ProductName):
subset=subset0.loc[subset0.ProductName==l]
subset=subset.sort_values(by="TimeToMaturity") #not needed since not line grapg
j=j+1
plt.subplot(12, 2, j); #np.ceil(len(np.unique(subset.Maturity))/3.0)
plt.subplots_adjust(hspace=.5);
plt.subplots_adjust(wspace=.5);
labs=[np.unique(subset.Maturity).tolist().index(k) for k in subset.Maturity]
plt.scatter(subset[['TimeToMaturity']], subset[['TradeTotBCTrans']], c=labs, cmap=cm.rainbow, alpha=trans);
plt.xlabel('TimeToMaturity'); plt.ylabel('Transformed Trade Volume');
#plt.plot(subset[['DayofMonth']], subset[['TradeTotBCTrans']], 'tab:gray', alpha=trans)
plt.title(l);
uCols=np.unique(subset.Maturity)
cust_hand=[]
for m in range(len(uCols)):
cust_hand.append(mpatches.Patch(color=cm.rainbow(np.linspace(0, 1, len(uCols))[m], alpha=trans), label=uCols[m]))
plt.legend(handles=cust_hand, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('strata_timeToMat.png', bbox_inches='tight')
#plots highlighting maturities?
trans=.5 #edgecolor='none'
plt.figure(figsize=(15, 50))
j=0
for i in np.unique(dataset.ProductType):
subset0=dataset.loc[dataset.ProductType==i]
for l in np.unique(subset0.ProductName):
subset=subset0.loc[subset0.ProductName==l]
subset=subset.sort_values(by="TimeToMaturity") #not needed since not line grapg
j=j+1
plt.subplot(12, 2, j); #np.ceil(len(np.unique(subset.Maturity))/3.0)
plt.subplots_adjust(hspace=.5);
plt.subplots_adjust(wspace=.5);
labs=[np.unique(subset.Maturity).tolist().index(k) for k in subset.Maturity]
plt.scatter(subset[['TimeToMaturity']], subset[['TradeTotBCTrans']], c=labs, cmap=cm.rainbow, alpha=trans);
plt.xlabel('TimeToMaturity'); plt.ylabel('Transformed Trade Volume');
#plt.plot(subset[['DayofMonth']], subset[['TradeTotBCTrans']], 'tab:gray', alpha=trans)
plt.title(l);
uCols=np.unique(subset.Maturity)
cust_hand=[]
for m in range(len(uCols)):
cust_hand.append(mpatches.Patch(color=cm.rainbow(np.linspace(0, 1, len(uCols))[m], alpha=trans), label=uCols[m]))
plt.legend(handles=cust_hand, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
if i=="Agr":
break
plt.savefig('strata_timeToMat2.png', bbox_inches='tight')
plt.figure(figsize=(30, 12))
for i, j in enumerate(np.unique(df.ProductType)):
subset=df.loc[df.ProductType==j]
plt.subplot(2, 3,i+1);
plt.subplots_adjust(hspace=.3);
plt.subplots_adjust(wspace=.2);
plt.hist(subset['TradeTotBCTrans']); plt.title(j+" Transformed Trade Volume");
plt.savefig('strata_types_hist.png', bbox_inches='tight')